### Replicate all empirical analysis in "Is an Ultimatum the Last Word on Crisis
### Bargaining?" (by Mark Fey and Brenton Kenkel)

library("plyr")
library("broom")
library("dplyr")
library("dummies")
library("foreach")
library("reshape2")
library("stringr")
library("xtable")

## -----------------------------------------------------------------------------
## Data cleaning
## -----------------------------------------------------------------------------

## Load ICB data and remove empty rows
crises <- read.delim("ICB_system_data.txt",
                     stringsAsFactors = FALSE,
                     strip.white = FALSE)
crises <- crises[1:455, ]

## Fix weird crisis name variables (to the extent possible)
crises$cluster1 <- with(crises,
                        str_c(cluster1, cluster2, cluster3, cluster4,
                              cluster5, cluster6))
crises$cluster1 <- str_trim(crises$cluster1)
names(crises)[3] <- "crisname"
for (i in paste0("cluster", 2:6))
    crises[, i] <- NULL

## Recoding of ICB data
crises <- within(crises, {
    ## Code whether a major power was involved in the dispute
    ##
    ## `gpinvtp` is supposed to be coded as 1 for post-1939 cases, but instead
    ## the variable is simply missing for many (strangely not all) of these.
    ## There is no missingness in the other two component variables, so it's
    ## safe to recode all NA's as 0's
    majorPower <- as.numeric(gpinv > 2 | gpinvtp > 2 | powinv > 2)
    majorPower[is.na(majorPower)] <- 0L

    ## Code indicator for "military-security" issues
    ##
    ## It is unclear from the codebook whether "three or more issues" always
    ## includes military-security issues; we assume it does
    milsec <- as.numeric(issues %in% 3:5)

    ## Recode heterogeneity as number of differences and remove missing values
    hetero[hetero == 9] <- NA
    hetero <- hetero - 1

    ## Code geographical relation as factor
    geogrel <- factor(geogrel,
                      levels = 1:3,
                      labels = c("contiguous", "near", "distant"))

    ## Recode system level as binary (mainly or fully dominant system)
    ##
    ## Not using this in the regressions for now, since the major power
    ## indicator should capture most of what we want and degrees of freedom
    ## are a concern
    domsystem <- as.numeric(syslevsy %in% 3:4)

    ## Recode power discrepancy as binary, following Wilkenfield et al (2005)
    powerDis <- as.numeric(powdissy > 1)

    ## Recode extent of satisfication with outcome in the natural way (higher
    ## values mean more satisfied) and remove missing values
    satisfaction <- exsat
    satisfaction[satisfaction > 5] <- NA
    satisfaction <- 6 - satisfaction

    ## Recode interwar disputes as boolean
    interwar <- iwcmb > 2

    ## Recode crisis recurrence as binary and remove missing values
    recurrence <- as.numeric(outesr == 1)
    recurrence[outesr == 3] <- NA

    ## Code crises that end in full-scale war
    war <- as.numeric(viol == 4)
})

## Satisfaction isn't coded in the system-level data for cases with no
## adversarial actor, so we need to bring in the actor-level data
actors <- read.delim("ICB_actor_data.txt",
                     stringsAsFactors = FALSE)

## Convert vector of 'outevl' values to 'satisfaction' (i.e., the reverse of
## 'exsat'), depending on the relative number of satisfied and dissatisfied
## actors
make_sat <- function(x, na.rm = TRUE)
{
    n_sat <- sum(x %in% 1:2, na.rm = na.rm)
    n_dis <- sum(x %in% 3:4, na.rm = na.rm)

    if (n_sat == 0) {
        ## Universal dissatisfaction
        return(1)
    } else if (n_dis == 0) {
        ## Universal satisfaction
        return(5)
    } else if (n_sat > n_dis) {
        ## More satisfied than not
        return(4)
    } else if (n_sat < n_dis) {
        ## More dissatisfied than not
        return(2)
    } else {
        ## Even mix
        return(3)
    }
}

## Infer satisfaction from actor-level data
##
## Doing this for all crises, not just those for which `satisfaction` is
## missing, for validation purposes
actors <- actors %>%
    group_by(crisno) %>%
    select(actor, outevl) %>%
    summarise(satisfaction = make_sat(outevl))

## Validate
print(table(inferred = actors$satisfaction,
            true = crises$satisfaction,
            useNA = "always"))          # Vast majority of cases are along the
                                        # diagonal, so this seems good

## Replace missing values with inferred values
crises$satisfaction <- ifelse(is.na(crises$satisfaction),
                              actors$satisfaction,
                              crises$satisfaction)

## Load dataset of ultimata
ultimata <- read.csv("ultimata-crises-final.csv")

## Code occurrence of an ultimatum
crises <- within(crises, {
    ultimatum <- as.numeric(crisno %in% ultimata$no)
})


## -----------------------------------------------------------------------------
## Cross-tabulation of ultimata and war
## -----------------------------------------------------------------------------

## Drop interwar crises
crises <- filter(crises, !interwar)

## Calculate cross-tab (Table 1 in manuscript)
tab_ult_war <- with(crises,
                    table(ultimatum, war))

print(tab_ult_war)

## Row percentages (in Table 1)
print(format(100 * prop.table(tab_ult_war, 1), digits = 2))

## Perform chi-squared test
test_ult_war <- chisq.test(tab_ult_war)

## Caption with chi-square results (of Table 1)
caption_ult_war <- paste0(
    "Cross-tabulation of \\textit{Ultimatum} and \\textit{War}, sample size $N = 406$.  The test statistic for the $\\chi^2$ test of independence is ",
    format(test_ult_war$statistic, digits = 2),
    " ($p = ",
    format(test_ult_war$p.value, digits = 2),
    "$)."
)
cat(caption_ult_war, "\n")


## -----------------------------------------------------------------------------
## Regression analyses
## -----------------------------------------------------------------------------

icb_formula <- ~ ultimatum + milsec + majorPower + hetero + geogrel + powerDis

war_model <- glm(update(icb_formula, war ~ .),
                 family = binomial(link = "logit"),
                 data = crises)

intensity_model <- glm(update(icb_formula, viol ~ .),
                       family = gaussian(),
                       data = crises)

satisfaction_model <- glm(update(icb_formula, satisfaction ~ .),
                          family = gaussian(),
                          data = crises)

recurrence_model <- glm(update(icb_formula, recurrence ~ .),
                        family = binomial(link = "logit"),
                        data = crises)

## Create regression table
make_table <- function(reg_models) {
    ## Extract dependent variables and give pretty names
    dvs <- sapply(reg_models, function (x) all.vars(formula(x)[[2]]))
    dvs <- plyr::revalue(dvs,
                         c("war" = "War (H1)",
                           "satisfaction" = "Satisfaction (H2)",
                           "viol" = "Intensity",
                           "recurrence" = "Recurrence"))

    ## Extract model types
    types <- sapply(reg_models, function (x) family(x)$family)
    types <- plyr::revalue(types,
                           c("gaussian" = "Linear",
                             "binomial" = "Logit"))

    ## Extract coefficients and standard errors
    results <- lapply(reg_models, tidy)

    ## Set up list of terms, with ultimatum first and intercept last
    ivs <- results[[1]]$term
    which_ult <- which(ivs == "ultimatum")
    which_int <- which(ivs == "(Intercept)")
    ivs <- c(ivs[which_ult],
             ivs[-c(which_ult, which_int)],
             ivs[which_int])

    ## Publication-ready names for independent variables
    iv_names <- c("ultimatum" = "Ultimatum",
                  "milsec" = "Military-Security Issue",
                  "majorPower" = "Major Power Involvement",
                  "hetero" = "Heterogeneity",
                  "geogrelnear" = "Proximity: Near",
                  "geogreldistant" = "Proximity: Distant",
                  "powerDis" = "Power Disparity",
                  "(Intercept)" = "Intercept")

    ## Create header of table
    tab_hdr <- c(paste0("& \\multicolumn{", length(dvs), "}{c}{\\it Dependent Variable} \\\\"),
                 paste0("\\cmidrule{2-", 1 + length(dvs), "}"),
                 paste(paste(c("", dvs), collapse = " & "), "\\\\"))

    ## Create body of table (coefficients on one line, std errors on the next line,
    ## star for statistical significance)
    tab_body <- foreach (iv = ivs, .combine = "c") %do% {
        cf <- sapply(results, function (x) as.numeric(x[x$term == iv, "estimate"]))
        se <- sapply(results, function (x) as.numeric(x[x$term == iv, "std.error"]))
        z <- cf / se
        p <- 2 * pnorm(-1 * abs(z))
        star <- p <= 0.05 & iv != "(Intercept)"

        cf <- sprintf("%.2f", cf)
        cf <- gsub("-", "$-$", cf)
        cf <- ifelse(star, paste0(cf, "$^*$"), cf)

        iv_name <- iv_names[iv]
        iv_name <- paste0("\\multirow{2}{*}{", iv_name, "}")
        cf_row <- c(iv_name, cf)
        cf_row <- paste(cf_row, collapse = " & ")
        cf_row <- paste(cf_row, "\\\\")

        se <- sprintf("%.2f", se)
        se <- paste0("(", se, ")")

        se_row <- c("", se)
        se_row <- paste(se_row, collapse = " & ")
        se_row <- paste(se_row, "\\\\")
        if (iv != tail(ivs, 1))
            se_row <- paste0(se_row, "[0.2em]")

        c(cf_row, se_row)
    }

    ## Table footer: Methods, log-likelihoods and numbers of observations
    meths <- sapply(reg_models, function(x) x$family$family)
    meths <- ifelse(meths == "binomial", "Logit", "Linear")
    meths <- c("Method", meths)
    meths <- paste(meths, collapse = " & ")

    lls <- sapply(reg_models, logLik)
    lls <- sprintf("%.2f", lls)
    lls <- gsub("-", "$-$", lls)
    lls <- c("Log-Likelihood", lls)
    lls <- paste(lls, collapse = " & ")

    ns <- sapply(reg_models, nobs)
    ns <- sprintf("%d", ns)
    ns <- c("Observations", ns)
    ns <- paste(ns, collapse = " & ")

    tab_ftr <- c(paste(meths, "\\\\"),
                 paste(lls, "\\\\"),
                 paste(ns, "\\\\"))

    tab <- c(
        paste(c("\\begin{tabular}{l", rep("c", length(dvs)), "}"), collapse = ""),
        "\\toprule",
        tab_hdr,
        "\\midrule",
        tab_body,
        "\\midrule",
        tab_ftr,
        "\\bottomrule",
        "\\end{tabular}"
    )

    tab
}

## Table 2 of manuscript
tab_main <- make_table(list(war_model, satisfaction_model))
writeLines(tab_main)

## Table 4 of appendix
tab_aux <- make_table(list(recurrence_model, intensity_model))
writeLines(tab_aux)


## -----------------------------------------------------------------------------
## Summary statistics
## -----------------------------------------------------------------------------

## Reduce to relevant data
crises <- filter(crises, !interwar)
crises <- select(crises,
                 ultimatum, milsec, majorPower, hetero, geogrel, powerDis,
                 war, viol, satisfaction, recurrence)

## Summarize variables
summ_crises <- dummy.data.frame(crises) # Convert `geogrel` to dummy variables
                                        # so we can take means/sds
summ_crises <- melt(summ_crises, id.vars = NULL)
summ_crises <- group_by(summ_crises, variable)
summ_crises <- summarise(summ_crises,
                         min = min(value, na.rm = TRUE),
                         max = max(value, na.rm = TRUE),
                         mean = mean(value, na.rm = TRUE),
                         sd = sd(value, na.rm = TRUE))

## Format output
levels(summ_crises$variable) <-
    c("Ultimatum", "Military-Security Issue", "Major Power Involvement",
      "Heterogeneity", "Proximity: Contiguous", "Proximity: Near",
      "Proximity: Distant", "Power Disparity", "War", "Intensity",
      "Satisfaction", "Recurrence")
names(summ_crises) <- c("Variable", "Min", "Max", "Mean", "SD")


## Table 3 of appendix
print(xtable(summ_crises,
             caption =
                 "Summary statistics for all variables used in the analysis of crisis data.",
             label = "tab:summ-crises",
             align = c("l", "l", "c", "c", "c", "c"),
             display = c("s", "s", "d", "d", "f", "f"),
             digits = 2),
      include.rownames = FALSE,
      add.to.row = list(
          pos = list(0, 8),
          command = c(
              "\\hline \\textit{Independent Variables}\\\\",
              "[0.4em] \\textit{Dependent Variables}\\\\"
          )
      ),
      hline.after = c(-1, 12)
      )

